Data Prep and Display

source("ozone_krige.R")
# get elevation from DEM data. 
# 1. Ozone with Temperature
# 2. Ozone with distance to nearest A1 road
# 3. Ozone with total length of all A1 roads within 500 m
# 4. Ozone with temperature and RH
# 5. Ozone with temperature and distance to nearest A1 road
# 6. Ozone with temperature and elevation
# 7. Ozone with temperature and elevation and distance to nearest A1 road.

path_to_data_folder="../data/krige_data/"
added_tiffs_list = list.files(path_to_data_folder)

# Adding Max Relative Humidity
o3_data_to_add = raster(paste0(path_to_data_folder,added_tiffs_list[8]))
added_data_projected = raster::projectRaster(o3_data_to_add, crs=prg)
o3_projected$rh=round(raster::extract(added_data_projected,o3_projected),2)

# Adding Temperature
o3_data_to_add = raster(paste0(path_to_data_folder,added_tiffs_list[14]))
added_data_projected1 = raster::projectRaster(o3_data_to_add, crs=prg)
o3_projected$max_temp=round(raster::extract(added_data_projected1,o3_projected),2)

plot(added_data_projected, main= "Maximum Relative Humidity")
plot(den_projected, col="red", add=TRUE)
plot(o3_projected, col="green", add=TRUE)
plot(o3_500m, col="blue", add=TRUE)

plot(added_data_projected1, main="Maximum Temperature")
plot(den_projected, col="red", add=TRUE)
plot(o3_projected, col="green", add=TRUE)
plot(o3_500m, col="blue", add=TRUE)

head(cbind(o3_projected$site_name,o3_projected$rh,o3_projected$elev))
##      [,1]                                    [,2]   
## [1,] "Aurora East"                           "71.64"
## [2,] "Boulder Reservoir"                     "62.7" 
## [3,] "Denver - Camp"                         "69.21"
## [4,] "Highland Reservoir"                    "70.22"
## [5,] "La Casa"                               "69.21"
## [6,] "National Renewable Energy Labs - Nrel" "66.11"
# join elevation column back to o3_projected
# o3_projected = merge(x=o3_projected,y=o3_elevs,by="site_name", duplicateGeoms = TRUE)
#o3_projected
year_o3 = as.data.frame(o3_projected)
year_o3 = year_o3 %>% 
  dplyr::select(c("site_name","max_temp","rh","long","lat"),everything())

# use to create dataframe of specific months, ex below is summer
# summer_o3 = year_o3 %>%
#   dplyr::select(contains(c("site_name","lat","long","elevation","Apr","May","Jun","Jul","Aug","Sep","Oct")))

# preview data
#summer_o3

Variograms

# # create months as spatial object
coordinates(year_o3) = c('long', 'lat')
proj4string(year_o3) = CRS(prg)
year_o3 = spTransform(year_o3, CRS(prg))
head(year_o3,3)
##           site_name max_temp    rh Jan.2018 Feb.2018 Mar.2018 Apr.2018 May.2018
## 1       Aurora East   283.31 71.64 33.91380 35.42312 43.28439 45.43727 47.59394
## 2 Boulder Reservoir   283.68 62.70 25.40463 33.18519 39.25994 43.20485 45.02353
## 3     Denver - Camp   283.87 69.21 14.37833 18.61761 30.82303 37.63830 39.23152
##   Jun.2018 Jul.2018 Aug.2018 Sep.2018 Oct.2018 Nov.2018 Dec.2018 Jan.2019
## 1 49.61276 53.10755 49.24113 39.17840 30.83868 32.31832 33.45175 36.43486
## 2 47.42883 51.86723 50.07590 39.28230 29.57180 29.20973 25.00410 27.13100
## 3 42.09934 47.16900 43.54900 31.02263 18.98047 18.84029 16.89985 15.65961
##   Feb.2019 Mar.2019 Apr.2019 May.2019 Jun.2019 Jul.2019 Aug.2019 Sep.2019
## 1 34.61000 42.00193 44.19620 43.09537 47.44900 47.23152 49.84026 44.01821
## 2 28.66670 39.33206 41.20779 39.27813 45.88648 45.10003 46.66984 41.79714
## 3 17.71459 28.87100 34.35880 33.96248 41.05390 42.33920 44.24339 34.86274
##   Oct.2019 Nov.2019 Dec.2019 Jan.2020 Feb.2020 Mar.2020 Apr.2020 May.2020
## 1 36.47442 31.85114 35.84944 36.00476 41.06897 38.94796 43.68759 43.44597
## 2 30.64510 26.22540 25.08240 31.69583 37.79107 35.38526 42.57246 42.62386
## 3 23.14317 14.39010 14.60787 20.12061 24.56533 28.45497 35.50503 37.67923
##   Jun.2020 Jul.2020 Aug.2020 Sep.2020 Oct.2020 Nov.2020 Dec.2020 Jan.2021
## 1 45.87060 46.22943 52.29220 42.18900 34.69803 32.73327 31.85921 32.67847
## 2 44.00843 45.94503 51.04083 39.21789 30.74184 29.11103 27.92811 28.62619
## 3 38.68769 42.28003 47.25843 33.14123 22.77806 19.19072 18.32052 17.06645
##   Feb.2021 Mar.2021 Apr.2021 May.2021 Jun.2021 Jul.2021 Aug.2021 Sep.2021
## 1 34.34856 42.11761 42.39020 41.45265 48.53647 57.07354 54.34539 48.65968
## 2 33.58389 41.26863 41.80321 38.93147 47.66077 54.87920 51.63948 46.89257
## 3 20.19170 27.83537 31.57519 32.21843 42.81380 52.45355 47.52255 38.31757
##   Oct.2021 Nov.2021 Dec.2021 Jan.2022 Feb.2022 Mar.2022 Apr.2022 May.2022
## 1 39.76661 34.70000 35.92747 36.46494 40.82989 41.74003 45.83776 45.38309
## 2 35.05306 31.56586 32.25159 28.76470 37.26468 38.31506 44.90980 43.12134
## 3 26.62816 20.11483 18.54703 15.47632 22.13868 25.87955 37.05690 39.51997
##   Jun.2022 Jul.2022 Aug.2022 Sep.2022
## 1 48.22110 50.52435 50.38516 43.08321
## 2 46.99371 46.86719 47.07507 38.00400
## 3 44.72353 47.19168 48.07397 35.00441
# create list of column month names 
cols = names(year_o3)[-c(1:3)]

# create empty list to store variograms
vgms = vector("list")

# loop thru columns
# var name = name of each month
# reformulate to do month~1 -> this is oridinary/simple kriging
# store in vgms, -> add models
for(i in seq_along(cols)) {
  var_name = cols[i]
  v = autofitVariogram(reformulate("max_temp+rh",var_name),year_o3)
  vgms[[i]] = v
}
# vgms

for(i in seq_along(cols)) {
  plot(vgms[[i]])
}

Krige and Error Results

# # create empty list to store information
# kriges = vector("list")
# kriges.cv = vector("list")
# rmses = vector("list")
# preds = vector("list")
# vars = vector("list")
# 
# # loop thru columns
# # reformulate to create formula based on var_name for each month
# # krige + store
# # krige.cv and rmse + store
# for(i in seq_along(cols)) {
#   var_name = cols[i]
#   k = autoKrige(reformulate("max_temp+rh",var_name),year_o3,grd, model = c("Ste","Sph","Mat","Exp","Nug","Gau","Lin"))
#   k.cv = autoKrige.cv(reformulate("max_temp+rh",var_name),year_o3,model = c("Ste","Sph","Mat","Exp","Nug","Gau","Lin"))
#   rmse = sqrt(mean(k.cv$krige.cv_output$residual^2))
#   mean_mth_pred = mean(k$krige_output$var1.pred)
#   mean_mth_var = mean(k$krige_output$var1.var)
#   kriges[[i]] =  k
#   kriges.cv[[i]] = k.cv
#   rmses[i] = rmse
#   preds[i] = mean_mth_pred
#   vars[i] = mean_mth_var
# }
# 
# # plot
# for(i in seq_along(cols)) {
#   month = names(year_o3[,(2+i)])
#   plot(kriges[[i]], ylab = "Semivariance", xlab = month)
# }
# 
# # add RMSE, predictions, and variance to gt table
# rmse_tibble = tibble(month_year = cols,
#                      rmse = rmses,
#                      mean_predicted_o3 = preds,
#                      mean_variance_o3 = vars)
# rmse_tibble
# rmse_gt = gt(rmse_tibble)
# rmse_gt